In [3]:
import pickle as pkl
import pandas as pd

with open(r"C:\Users\GUO\Documents\WeChat Files\wxid_73ihnide6ynu21\FileStorage\File\2023-09\stock_price.pkl", "rb") as f:
	object = pkl.load(f,encoding='latin1')
df = pd.DataFrame(object)
df.to_csv(r"D:\momentum_without_crush\stock_price.csv")

暂时缺少市值的数据,所以先直接用了等权重作为market_index的计算方式

In [12]:
import pandas as pd
import numpy as np

# Read the data and log-transform it
data_path = r"D:\momentum_without_crush\stock_price.csv"  # Replace this with your file path
df = pd.read_csv(data_path, parse_dates=['trade_dt'])
log_df = np.log(df.set_index('trade_dt'))

# Calculate the market index as the equally-weighted mean of all assets
market_index = log_df.mean(axis=1).values

已知$\quad \pi_0=1, \quad \pi_s=(-1)^s \prod_{i=0}^{s-1} \frac{d-i}{s !}, \quad \text { for } \quad s>0$ 利用FFT计算分数差分 $\tilde{p}_{t, i}=\sum_{s=0}^{\infty} \pi_s p_{t-s, i}$

In [13]:
from decimal import Decimal, getcontext
from pyecharts.charts import Bar, Grid, Line,Scatter, Timeline, Kline, HeatMap
from pyecharts import options as opts


getcontext().prec = 50

def calculate_weights_decimal(d, max_lag=100000):
    weights = [Decimal(1.0)]
    for k in range(1, max_lag):
        weight = -weights[-1] * Decimal((d - k + 1) / k)
        weights.append(weight)
    return weights

d_values = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
pi_s_results_decimal = {d: calculate_weights_decimal(d, len(log_df)) for d in d_values}

def fft_fractional_differencing(series, pi_s):
    n = len(series)
    m = len(pi_s)
    padded_series = np.pad(series, (0, m-1), 'constant')
    padded_pi_s = np.pad(pi_s, (0, n-1), 'constant')
    series_fft = np.fft.fft(padded_series)
    pi_s_fft = np.fft.fft(padded_pi_s)
    convolved_fft = series_fft * pi_s_fft
    convolved_series = np.fft.ifft(convolved_fft)
    return np.real(convolved_series[:n])

differenced_market_index_dict_fft_decimal = {}
for d in d_values:
    pi_s_values = np.array(pi_s_results_decimal[d], dtype=float)
    differenced_market_index = fft_fractional_differencing(market_index, pi_s_values)
    differenced_market_index_dict_fft_decimal[d] = differenced_market_index

展示了系数$\pi_s$的具体数值,我们可以看到在$d=0$和$d=1$时与不做差分处理/传统一阶差分的特性

In [14]:
max_lag_plot = 100
pi_s_plot_data = []

for d in d_values:
    pi_s_values = calculate_weights_decimal(d, max_lag=max_lag_plot)
    pi_s_values_float = [float(val) for val in pi_s_values]  # Convert to float for plotting
    pi_s_plot_data.append((f'd={d}', pi_s_values_float))

pi_s_line_chart = Line(init_opts=opts.InitOpts(width='1000px', height='600px', theme='dark'))
pi_s_line_chart.add_xaxis(list(range(max_lag_plot + 1)))  # Adding s values on x-axis

for name, y_values in pi_s_plot_data:
    pi_s_line_chart.add_yaxis(
    name, 
    y_values,
    label_opts=opts.LabelOpts(is_show=False),
    symbol="none"
)

pi_s_line_chart.set_global_opts(
    xaxis_opts=opts.AxisOpts(type_="category"),
    tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis"),
    datazoom_opts=[opts.DataZoomOpts(range_start=10, range_end=80)],

)


pi_s_line_chart.render_notebook()
Out[14]:

分数差分后的结果,d = 0为只做了对数化处理,d = 1为一阶差分,可以看到随着差分阶数更接近1,数据在保留相应记忆性的前提下平稳性更好了

In [16]:
plot_data_optimized = []
date_range = df['trade_dt'].astype(str).tolist()  

for d in d_values:
    plot_data_optimized.append((f'd={d}', differenced_market_index_dict_fft_decimal[d].tolist()))

line_chart = Line(init_opts=opts.InitOpts(width='1000px', height='600px', theme = 'dark'))
line_chart.add_xaxis(date_range)

for name, y_values in plot_data_optimized:
    line_chart.add_yaxis(
        name, 
        y_values,
        label_opts=opts.LabelOpts(is_show=False),
    )

line_chart.set_global_opts(
    xaxis_opts=opts.AxisOpts(type_="category"),
    datazoom_opts=[opts.DataZoomOpts(range_start=10, range_end=80)],
    tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis")
)

# Render the chart
line_chart.render_notebook()
Out[16]:

we express the predicted return as a weighted sum of all available past log-prices $\widehat{r}_{T+1}=\sum_{u=1}^T w_u p_u$, where: $$ w_u= \begin{cases}\left(\frac{1}{\tau} \sum_{s=T-u-\tau+1}^{T-u} \pi_s\right)-\pi_{T-u+1}, & \text { for } u=1, \ldots, T-\tau \\ \left(\frac{1}{\tau} \sum_{s=0}^{T-u} \pi_s\right)-\pi_{T-u+1}, & \text { for } u=T-\tau+1, \ldots, T-1 \\ \frac{\pi_0}{\tau}-\pi_1-1, & \text { for } u=T\end{cases} $$

Therefore, allows us to approximate the fractional momentum as: $$ \widehat{r}_{T+1} \approx-\frac{w_{T-\tau}}{\tau-1} \sum_{u=T-\tau+1}^{T-1}\left(p_u-p_{T-\tau}\right)+\sum_{u=T-\tau+1}^{T-1} \widetilde{w}_u\left(p_u-p_T\right), $$ where $\widetilde{w}_u=w_u+\left(w_{T-\tau}\right) /(\tau-1)$.

作者核心思路上认为分数动量被分成了两个部分,$\sum_{u=T-\tau+1}^{T-1}\left(p_u-p_{T-\tau}\right)$代表的动量信号,$\sum_{u=T-\tau+1}^{T-1}\left(p_u-p_T\right)$代表的反转信号,并且在后续针对系数$\widetilde{w}_u, w_u$的计算中,注意到动量信号的权重为正,反转信号的权重随着滞后的增加而迅速下降。

In [17]:
def calculate_w_u_modified(T, tau, pi_s):
    pi_s = np.array([float(x) for x in pi_s])  # Convert Decimal to float
    w_u_values = np.zeros(T)
    
    for u in range(1, T - tau + 1):
        w_u_values[u-1] = (1 / tau) * np.sum(pi_s[T-u-tau : T-u]) - pi_s[T-u]
    
    for u in range(T - tau + 1, T):
        w_u_values[u-1] = (1 / tau) * np.sum(pi_s[:T-u]) - pi_s[T-u]
    
    w_u_values[-1] = pi_s[0] / tau - pi_s[1] - 1
    
    return w_u_values


T = len(log_df)
tau = 250

w_u_results_direct_modified = {}
for d in d_values:
    pi_s_values = pi_s_results_decimal[d]
    w_u_values = calculate_w_u_modified(T, tau, pi_s_values)
    w_u_results_direct_modified[d] = w_u_values

这里给出了衡量过去价格影响 $\widehat{r}_{T+1}=\sum_{u=1}^T w_u p_u$的系数$w_u$值的变化,

In [18]:
date_range = range(T-tau, T)
line_chart = Line(init_opts=opts.InitOpts(width='1000px', height='600px', theme='dark'))
line_chart.add_xaxis(date_range)

for d, y_values in w_u_results_direct_modified.items():
    line_chart.add_yaxis(
        str(d),
        y_values[T-tau: ],
        label_opts=opts.LabelOpts(is_show=False)
    )

line_chart.set_global_opts(
    xaxis_opts=opts.AxisOpts(type_="category"),
    datazoom_opts=[opts.DataZoomOpts(range_start=10, range_end=80)],
    tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis")
)

line_chart.render_notebook()
Out[18]:

类似的给出了$\widetilde{w}_u$的系数变化,我们可以注意到其确实有衰减效应,这和我们对反转效应是一个相对短期影响的认识相符合

In [20]:
def calculate_tilde_w_u(T, tau, pi_s_values):
    w_u_values = calculate_w_u_modified(T, tau, pi_s_values)
    
    w_T_minus_tau = w_u_values[T-tau]  
    tilde_w_u_values = [w_u + w_T_minus_tau / (tau - 1) for w_u in w_u_values]
    
    return tilde_w_u_values

tilde_w_u_results = {}
for d in d_values:
    pi_s_values = pi_s_results_decimal[d]  # Assuming you've calculated pi_s_values already
    tilde_w_u_values = calculate_tilde_w_u(T, tau, pi_s_values)
    tilde_w_u_results[d] = tilde_w_u_values


line_chart_tilde = Line(init_opts=opts.InitOpts(width='1000px', height='600px', theme='dark'))
line_chart_tilde.add_xaxis(date_range)

for d, y_values in tilde_w_u_results.items():
    line_chart_tilde.add_yaxis(
        str(d),
        y_values[T-tau: T - 1],
        label_opts=opts.LabelOpts(is_show=False)
    )

line_chart_tilde.set_global_opts(
    xaxis_opts=opts.AxisOpts(type_="category"),
    datazoom_opts=[opts.DataZoomOpts(range_start=10, range_end=80)],
    tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis")
)

line_chart_tilde.render_notebook()
Out[20]:

计算$\widehat{r}_{t}$,也即我们的因子 $$ \widehat{r}_{T+1} \approx-\frac{w_{T-\tau}}{\tau-1} \sum_{u=T-\tau+1}^{T-1}\left(p_u-p_{T-\tau}\right)+\sum_{u=T-\tau+1}^{T-1} \widetilde{w}_u\left(p_u-p_T\right) + w_Tp_T, $$ 后续策略将会围绕它展开,针对不同的d和每一只股票都进行了计算

In [31]:
import numpy as np

def calculate_r_hat_optimized(series, w_u_values, tilde_w_u_values, tau):
    T = len(series)
    r_hat = np.zeros(T)
    
    cum_sum_series = np.cumsum(series)
        
    factor_first_term = -w_u_values[tau:T] / (tau - 1)
    
    sum_series_first_term = cum_sum_series[tau - 1:T - 1] - cum_sum_series[:T - tau]
    sum_series_first_term += series[:T - tau]  # add back the subtracted terms
    
    # Vectorized calculation for second term
    sum_series_second_term = np.array([
        np.dot(tilde_w_u_values[t - tau + 1:t], series[t - tau + 1:t] - series[t]) for t in range(tau, T)
    ])
    
    r_hat[tau:T] = factor_first_term * sum_series_first_term + sum_series_second_term
        
    return r_hat

r_hat_dict_optimized = {}

# Loop over each d value
for d in d_values:
    print(f"Calculating for d = {d}...")
    
    w_u_values = w_u_results_direct_modified[d]
    
    r_hat_dict_optimized[d] = {}
    
    # Loop over each stock in the dataframe
    for stock in df.columns[1:]:  # Skip the 'trade_dt' column
        prices = df[stock].dropna().values
        
        if len(prices) < tau:
            continue
        
        r_hat_values = calculate_r_hat_optimized(prices, w_u_values, tilde_w_u_values, tau)
        
        r_hat_dict_optimized[d][stock] = r_hat_values
Calculating for d = 0...
Calculating for d = 0.1...
Calculating for d = 0.2...
Calculating for d = 0.3...
Calculating for d = 0.4...
Calculating for d = 0.5...
Calculating for d = 0.6...
Calculating for d = 0.7...
Calculating for d = 0.8...
Calculating for d = 0.9...
Calculating for d = 1...
In [32]:
max_length = max(max(len(arr) for arr in stock_dict.values()) for stock_dict in r_hat_dict_optimized.values())

# Pad each r_hat array with NaNs to match the maximum length
for d, stock_dict in r_hat_dict_optimized.items():
    for stock, r_hat_array in stock_dict.items():
        padded_array = np.pad(r_hat_array, (0, max_length - len(r_hat_array)), 'constant', constant_values=np.nan)
        r_hat_dict_optimized[d][stock] = padded_array

# Get the time index from log_df and pad it with NaNs to match the maximum length
time_index = log_df.index.to_numpy()
padded_time_index = np.pad(time_index, (0, max_length - len(time_index)), 'constant', constant_values=np.nan)

# Save each r_hat DataFrame as a CSV, including the time column
for d, r_hat_data in r_hat_dict_optimized.items():
    r_hat_df = pd.DataFrame(r_hat_data)
    r_hat_df['Time'] = padded_time_index  # Add the time column
    r_hat_df = r_hat_df[['Time'] + [col for col in r_hat_df.columns if col != 'Time']]  # Reorder columns to put 'Time' first
    r_hat_df.to_csv(f"r_hat_values_for_d_{d}.csv", index=False)

夏普比:投资者风险每增加一单位,对应的超额收益增加多少

最大回撤:可能发生亏损的最大幅度(这个指标和策略的周期高度相关——不会真有人天天换股票还可能亏20%吧)

IC:股票在调仓周期期初排名和调仓周期期末收益排名的线性相关性(0.5)

IR:IC的多周期均值/IC的标准方差,代表因子获取稳定超额收益的能力(在装B的人嘴里叫$\alpha$),一般以0.05为界限

rankIC:用了秩相关系数,用排名之间的相关性规避了可能单纯计算相关性带来的数值问题

t值:判断因子针对收益是否显著

In [1]:
import pandas as pd
import numpy as np
In [2]:
from scipy.stats import spearmanr

def calculate_metrics_from_capital(capital_over_time, trade_frequency, annual_trading_days=252):
    # Calculate portfolio returns from capital_over_time
    portfolio_returns = np.diff(capital_over_time) / capital_over_time[:-1]
    
    # Annualization factor
    annualization_factor = annual_trading_days / trade_frequency
    
    # Metrics calculation
    annualized_return = np.mean(portfolio_returns) * annualization_factor
    annualized_volatility = np.std(portfolio_returns) * np.sqrt(annualization_factor)
    sharpe_ratio = annualized_return / annualized_volatility  # Assuming risk-free rate is 0
    
    max_accumulated = np.maximum.accumulate(capital_over_time)
    drawdowns = 1 - capital_over_time / max_accumulated
    max_drawdown = -np.max(drawdowns)
    
    spearman_r, _ = spearmanr(portfolio_returns[:-1], portfolio_returns[1:])
    ic = spearman_r
    icir = ic / np.std(portfolio_returns)  # Using standard ICIR formula
    
    t_stat = (np.mean(portfolio_returns) / np.std(portfolio_returns)) * np.sqrt(len(portfolio_returns))
    rank_ic, _ = spearmanr(portfolio_returns[:-1], np.diff(np.cumprod(1 + np.array(portfolio_returns)) - 1))

    winning_periods = np.sum(portfolio_returns > 0)
    total_periods = len(portfolio_returns)
    win_rate = winning_periods / total_periods
    
    metrics = pd.DataFrame({
        'Indicator': ['Annualized Return', 'Annualized Volatility', 'Sharpe Ratio', 'Max Drawdown', 'IC', 'ICIR', 't-statistic', 'Rank IC', 
        'Win_rate'],
        'Value': [annualized_return, annualized_volatility, sharpe_ratio, max_drawdown, ic, icir, t_stat, rank_ic, win_rate]
    })
    
    return metrics

模拟了一个分多头/空头组的策略

首先确定调仓频率(周/月)

之后根据因子值进行排名,取前$quantile_1$作为多头组,后$quantile_2$作为空头组,多头组在周期开始时买入股票,结束时卖出;空头组在周期开始时以“借买股票需要的前”的方式“买入”,结束时还对方相应的股票(所以需要预期下跌才会有收益)

依据这样的多空策略框架进行操作,具体有一个script比较详细的给了每一步买什么收益率如何

In [3]:
# 定义模拟交易函数
def run_simulation(merged_data, initial_capital, strategy_mode, trading_days_in_cycle, investment_fraction, shorting_fraction):
    if strategy_mode == 'both':
        ratio_for_long = 0.5
    elif strategy_mode == 'long':
        ratio_for_long = 1.0
    elif strategy_mode == 'short':
        ratio_for_long = 0.0
    else:
        raise ValueError("Invalid strategy_mode. Choose 'long', 'short', or 'both'")

    long_capital = initial_capital * ratio_for_long 
    short_capital = initial_capital - long_capital
    capital = initial_capital

    long_capital_over_time = [long_capital]
    short_capital_over_time = [short_capital]
    capital_over_time = [capital]
    time_over_time = [merged_data['Date'].iloc[0]]  # Initialize with the first date

    for i in range(0, len(merged_data), trading_days_in_cycle):
        cycle_data = merged_data.iloc[i:i+trading_days_in_cycle]
        
        if len(cycle_data) < trading_days_in_cycle:
            break
        
        
        first_day_data = cycle_data.iloc[0]
        last_day_data = cycle_data.iloc[-1]
        time_over_time.append(last_day_data['Date'])  # Add the last date of this cycle

        stock_r_hat_values = first_day_data.filter(like='_r_hat')
        stock_r_hat_values_sorted = stock_r_hat_values.sort_values(ascending=False)
        
        num_stocks_to_invest = int(len(stock_r_hat_values_sorted) * investment_fraction)
        num_stocks_to_short = int(len(stock_r_hat_values_sorted) * shorting_fraction)
        
        selected_stocks = stock_r_hat_values_sorted.index[:num_stocks_to_invest]
        shorted_stocks = stock_r_hat_values_sorted.index[-num_stocks_to_short:]
        
        selected_stock_prices_first_day = first_day_data[selected_stocks.str.replace('_r_hat', '_price')]
        selected_stock_prices_last_day = last_day_data[selected_stocks.str.replace('_r_hat', '_price')]
        shorted_stock_prices_first_day = first_day_data[shorted_stocks.str.replace('_r_hat', '_price')]
        shorted_stock_prices_last_day = last_day_data[shorted_stocks.str.replace('_r_hat', '_price')]
        
        amount_per_long_stock = long_capital / num_stocks_to_invest if num_stocks_to_invest > 0 else 0
        amount_per_shorted_stock = short_capital / num_stocks_to_short if num_stocks_to_short > 0 else 0
        
        shares_bought = amount_per_long_stock / selected_stock_prices_first_day
        amount_after_selling = shares_bought * selected_stock_prices_last_day
        long_profit_loss = amount_after_selling - amount_per_long_stock
        
        shares_shorted = amount_per_shorted_stock / shorted_stock_prices_first_day
        amount_after_covering = shares_shorted * shorted_stock_prices_last_day
        short_profit_loss = amount_per_shorted_stock - amount_after_covering

        long_capital = np.sum(amount_after_selling)
        short_capital += np.sum(short_profit_loss)
        
        capital = long_capital + short_capital
        
        long_capital_over_time.append(long_capital)
        short_capital_over_time.append(short_capital)
        capital_over_time.append(capital)

    final_capital = capital_over_time[-1]

    capital_time_df = pd.DataFrame({
        'Date': pd.to_datetime(time_over_time),  
        'Capital': capital_over_time
    })

    return final_capital, capital_over_time, long_capital_over_time, short_capital_over_time, capital_time_df
In [4]:
from collections import defaultdict

# Function to calculate annual performance metrics based on capital over time
def calculate_annual_performance(capital_over_time,long_capital_over_time, short_capital_over_time, trading_days_in_cycle):
    # Initialize variables
    annual_performance = defaultdict(lambda: defaultdict(float))
    num_cycles_per_year = 252 // trading_days_in_cycle  # Approximate number of cycles in a trading year

    # Loop through each year and calculate performance metrics
    for year_start in range(0, len(capital_over_time), num_cycles_per_year):
        year_end = min(year_start + num_cycles_per_year, len(capital_over_time) - 1)
        year_capital_data = capital_over_time[year_start:year_end + 1]
        year_long_capital_data = long_capital_over_time[year_start:year_end + 1]
        year_short_capital_data = short_capital_over_time[year_start:year_end + 1]

        # Skip if not enough data for the year
        if len(year_capital_data) < 2:
            continue

        # Calculate metrics
        start_capital = year_capital_data[0]
        end_capital = year_capital_data[-1]
        annual_return = (end_capital / start_capital - 1) * 100  # in percentage

        start_long_capital = year_long_capital_data[0]
        end_long_capital = year_long_capital_data[-1]
        annual_long_return = (end_long_capital / start_long_capital - 1) * 100  # in percentage
        
        # For the short capital
        start_short_capital = year_short_capital_data[0]
        end_short_capital = year_short_capital_data[-1]
        annual_short_return = (end_short_capital / start_short_capital - 1) * 100  # in percentage

        # Calculating Max Drawdown correctly based on cummax
        running_max = np.maximum.accumulate(year_capital_data)
        drawdowns = 1 - (year_capital_data / running_max)
        max_drawdown = -np.max(drawdowns) * 100  # in percentage

        year = year_start // num_cycles_per_year + 2012  # Starting from Year 1 (2012 in this case)
        annual_performance[year]['Annual Return'] = annual_return
        annual_performance[year]['Long Annual Return'] = annual_long_return
        annual_performance[year]['Short Annual Return'] = annual_short_return
        annual_performance[year]['Max Drawdown'] = max_drawdown

    negative_return_years = []
    for year, metrics in annual_performance.items():
        if metrics['Annual Return'] < 0:
            negative_return_years.append(year)
            
    return pd.DataFrame.from_dict(annual_performance, orient='index'), negative_return_years
In [96]:
import os

def merge_r_hat_and_stock_price(r_hat_folder_path, stock_price_file_path, start_date):
    """
    Merge r_hat data and stock_price data from the specified folder and file path.

    Parameters:
        - r_hat_folder_path: The folder containing CSV files of r_hat data for various d values.
        - stock_price_file_path: The file path for the stock_price data CSV file.

    Returns:
        - A dictionary where keys are the d values and values are the merged DataFrames for each d value.
    """
    stock_price_data = pd.read_csv(stock_price_file_path)
    stock_price_columns = set(stock_price_data.columns)
    
    merged_data_dict = {}

    for filename in os.listdir(r_hat_folder_path):
        if filename.endswith(".csv"):
            d_value = float(filename.split("_")[-1].replace(".csv", "").replace("d", ""))
            
            r_hat_data = pd.read_csv(os.path.join(r_hat_folder_path, filename))
            r_hat_columns = set(r_hat_data.columns)
            
            common_columns = r_hat_columns.intersection(stock_price_columns)
            common_stock_columns = common_columns - {'Time', 'trade_dt'}
            
            filtered_r_hat_data = r_hat_data[['Time'] + list(common_stock_columns)]
            filtered_stock_price_data = stock_price_data[['trade_dt'] + list(common_stock_columns)]
            
            filtered_r_hat_data = filtered_r_hat_data.rename(columns={"Time": "Date"})
            filtered_stock_price_data = filtered_stock_price_data.rename(columns={"trade_dt": "Date"})
            
            merged_data = pd.merge(filtered_r_hat_data, filtered_stock_price_data, on="Date", suffixes=('_r_hat', '_price'))
            if start_date is not None:
                merged_data = merged_data[merged_data['Date'] >= start_date]
    
            
            merged_data_dict[d_value] = merged_data

    return merged_data_dict


merged_data_dict = merge_r_hat_and_stock_price(r"C:\Users\GUO\hello\.venv\quant\momentum_without_crash", r"D:\momentum_without_crush\stock_price.csv", '2001-02-17')
In [97]:
# 遍历字典中的每一个 DataFrame
for key in merged_data_dict:
    df = merged_data_dict[key]
    
    # 对于每一个 '_r_hat' 列
    # Correctly align the '_r_hat' columns with their corresponding '_price' columns based on the new criteria
    for col in df.columns:
        if '_r_hat' in col:
            # Find the corresponding '_price' column
            price_col = col.replace('_r_hat', '_price')
            
            # Check if the corresponding '_price' column exists
            if price_col in df.columns:
                # Check if '_r_hat' column is not all NaN
                if not df[col].isna().all():
                    # Find the last valid index for '_price' column
                    price_last_valid = df[price_col].last_valid_index()
                    
                    # Set the second last valid value of '_r_hat' to align with the last valid value of '_price'
                    if price_last_valid is not None:
                        r_hat_second_last_valid = df[col].iloc[:price_last_valid].last_valid_index() - 1
                        
                        # Perform the shift to align
                        shift_rows = price_last_valid - r_hat_second_last_valid
                        df[col] = df[col].shift(shift_rows)
    # 更新字典中的 DataFrame
    merged_data_dict[key] = df
In [99]:
for key, data in merged_data_dict.items():
    filtered_data = data[data['Date'] >= '2012-03-04']
    r_hat_columns = [col for col in filtered_data.columns if '_r_hat' in col]
    
    for r_hat_col in r_hat_columns:
        price_col = r_hat_col.replace('_r_hat', '_price')
        
        if price_col in filtered_data.columns:
            # 使用向量化操作进行计算
            indices = filtered_data.index[::-1]
            shifted_price = filtered_data[price_col].shift(-1).reindex(indices)
            adjusted_w_u_values = w_u_values[T - indices]
            filtered_data.loc[indices, r_hat_col] += shifted_price * adjusted_w_u_values
    
    merged_data_dict[key] = filtered_data
In [64]:
merged_data_dict[0.1].to_csv(r"D:\momentum_without_crush\merged.csv", index = False)
In [ ]:
import warnings

# 忽略特定类型的警告
warnings.filterwarnings("ignore", category=RuntimeWarning)

def grid_search_per_d(merged_data_dict, initial_capital=1, trading_days_in_cycle=21, shorting_fraction= 0.1):
    investment_fraction_values = [0.025, 0.1, 0.2, 0.4, 0.45, 0.5]
    
    best_params_per_d = {}
    
    for d, merged_data in merged_data_dict.items():
        best_params = None
        best_negative_years = float('inf')
        best_final_capital = 0
        
        for investment_fraction in investment_fraction_values:

            final_capital, capital_over_time, long_capital_over_time, short_capital_over_time,capital_time_df = run_simulation(
                merged_data, initial_capital, 'long', trading_days_in_cycle, investment_fraction, shorting_fraction
            )

            if  (final_capital > best_final_capital):
                best_params = {'investment_fraction': investment_fraction, 'shorting_fraction': shorting_fraction}
                best_final_capital = final_capital
                print(f"New best params for d = {d}: {best_params} Final capital: {best_final_capital}")
        
        best_params_per_d[d] = best_params

    return best_params_per_d

# Run the grid search
best_params_per_d = grid_search_per_d(merged_data_dict)
print(f"Best params per d: {best_params_per_d}")
New best params for d = 0.1: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.9742772112368465
New best params for d = 0.1: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.749086124684469
New best params for d = 0.1: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.644234861006474
New best params for d = 0.2: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.9723027610897716
New best params for d = 0.2: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.7641340823950555
New best params for d = 0.2: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.694696256387405
New best params for d = 0.3: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.876025543395229
New best params for d = 0.3: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.747151252055097
New best params for d = 0.3: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.669716549783102
New best params for d = 0.4: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.8769850366903307
New best params for d = 0.4: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.78378981934201
New best params for d = 0.4: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.651222007161921
New best params for d = 0.5: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.8798148308301714
New best params for d = 0.5: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.867453454533341
New best params for d = 0.5: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.612224528010142
New best params for d = 0.6: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.860321132257489
New best params for d = 0.6: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.940407643837893
New best params for d = 0.6: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.676211523812558
New best params for d = 0.7: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.892761680099581
New best params for d = 0.7: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 4.923533858267651
New best params for d = 0.7: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.762687559228131
New best params for d = 0.8: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 3.9792340808396216
New best params for d = 0.8: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 5.071500254321456
New best params for d = 0.8: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.786275684976067
New best params for d = 0.9: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 4.012778335260196
New best params for d = 0.9: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 5.117874611179349
New best params for d = 0.9: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 6.83048616722581
New best params for d = 0.0: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 11.233726414297216
New best params for d = 0.0: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 11.26001005277254
New best params for d = 0.0: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 11.952328261125679
New best params for d = 1.0: {'investment_fraction': 0.025, 'shorting_fraction': 0.1} Final capital: 10.090359309849847
New best params for d = 1.0: {'investment_fraction': 0.1, 'shorting_fraction': 0.1} Final capital: 11.319795698828981
New best params for d = 1.0: {'investment_fraction': 0.2, 'shorting_fraction': 0.1} Final capital: 12.051178843402448
Best params per d: {0.1: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.2: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.3: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.4: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.5: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.6: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.7: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.8: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.9: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 0.0: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}, 1.0: {'investment_fraction': 0.2, 'shorting_fraction': 0.1}}
In [101]:
initial_capital = 1
trading_days_in_cycle = 21

final_capital, capital_over_time, long_capital_over_time, short_capital_over_time, capital_time_df = run_simulation(
    merged_data_dict[0], initial_capital, 'long', trading_days_in_cycle,0.006,0.1)
final_capital
Out[101]:
4.400402015355775
In [102]:
metrics = calculate_metrics_from_capital(capital_over_time, trading_days_in_cycle , annual_trading_days=252)
metrics
Out[102]:
Indicator Value
0 Annualized Return 0.177262
1 Annualized Volatility 0.285742
2 Sharpe Ratio 0.620356
3 Max Drawdown -0.480625
4 IC 0.049569
5 ICIR 0.600930
6 t-statistic 2.065267
7 Rank IC 0.091733
8 Win_rate 0.593985
In [103]:
annual_performance_sample = calculate_annual_performance(capital_over_time,long_capital_over_time, short_capital_over_time, trading_days_in_cycle)
annual_performance_sample
Out[103]:
(      Annual Return  Long Annual Return  Short Annual Return  Max Drawdown
 2012      -6.596295           -6.596295                  NaN    -16.398647
 2013      16.660401           16.660401                  NaN    -16.503268
 2014     113.283983          113.283983                  NaN     -6.177381
 2015      -8.976034           -8.976034                  NaN    -47.555449
 2016       5.181663            5.181663                  NaN    -12.990307
 2017       4.205690            4.205690                  NaN    -18.069958
 2018     -15.720422          -15.720422                  NaN    -30.008132
 2019      34.590760           34.590760                  NaN     -3.318545
 2020      24.020836           24.020836                  NaN     -7.447464
 2021      35.662192           35.662192                  NaN    -17.262459
 2022      10.827545           10.827545                  NaN     -9.568065
 2023     -10.273541          -10.273541                  NaN    -10.273541,
 [2012, 2015, 2018, 2023])
In [104]:
import pandas as pd

hs300_data = pd.read_excel(r"D:\wind\沪深300指数.xlsx")
hs300_data.columns = ['Date', '沪深300指数']
hs300_data['Date'] = pd.to_datetime(hs300_data['Date'])
merged_df = pd.merge(capital_time_df, hs300_data, on='Date', how='inner')
merged_df['Normalized_沪深300指数'] = merged_df['沪深300指数'] / merged_df['沪深300指数'].iloc[0]
In [105]:
merged_df['超额收益'] = merged_df['Capital'] / merged_df['Normalized_沪深300指数']
merged_df
Out[105]:
Date Capital 沪深300指数 Normalized_沪深300指数 超额收益
0 2012-03-05 1.000000 2662.6980 1.000000 1.000000
1 2012-04-05 0.923819 2512.8320 0.943716 0.978916
2 2012-05-08 0.967525 2709.1160 1.017433 0.950948
3 2012-06-06 0.941296 2557.4010 0.960455 0.980053
4 2012-07-06 0.926490 2472.6140 0.928612 0.997715
... ... ... ... ... ...
129 2023-04-24 5.087475 3982.6429 1.495717 3.401362
130 2023-05-26 5.091094 3850.9511 1.446259 3.520181
131 2023-06-28 5.081716 3840.7986 1.442446 3.522985
132 2023-07-27 4.904241 3902.3458 1.465561 3.346324
133 2023-08-25 4.400402 3709.1517 1.393005 3.158928

134 rows × 5 columns

In [106]:
dates = merged_df['Date'].astype(str).tolist()
capital = merged_df['Capital'].tolist()
hs300 = merged_df['Normalized_沪深300指数'].tolist()
compared = merged_df['超额收益'].tolist()

# Initialize line chart
line = Line(init_opts=opts.InitOpts(theme='dark'))
line.add_xaxis(dates)

# Add lines
line.add_yaxis("Strategy Capital", capital, label_opts=opts.LabelOpts(is_show=False))
line.add_yaxis("沪深300指数", hs300, label_opts=opts.LabelOpts(is_show=False))
line.add_yaxis("超额收益", compared, label_opts=opts.LabelOpts(is_show=False))

# Add configuration
line.set_global_opts(
    xaxis_opts=opts.AxisOpts(type_="category"),
    datazoom_opts=[opts.DataZoomOpts(range_start=0, range_end=100)],
    tooltip_opts=opts.TooltipOpts(is_show=True, trigger="axis")

)

# Render the chart
line.render_notebook()
Out[106]:
In [44]:
line.render()
Out[44]:
'c:\\Users\\GUO\\hello\\.venv\\quant\\momentum_without_crash\\render.html'